{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "mAGKZvMaBmWe",
    "outputId": "96aacb80-bb4c-4d0e-fce1-2862c223ce15"
   },
   "source": [
    "# How to run calculations over text data?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "tmBHvzszAEN-"
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "from sklearn import datasets, ensemble, model_selection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "-Xko20Q1FvZV"
   },
   "outputs": [],
   "source": [
    "from evidently import ColumnMapping\n",
    "from evidently.report import Report\n",
    "from evidently.test_suite import TestSuite\n",
    "\n",
    "from evidently.metric_preset import DataDriftPreset\n",
    "from evidently.metric_preset import DataQualityPreset\n",
    "from evidently.metric_preset import RegressionPreset\n",
    "from evidently.metric_preset import ClassificationPreset\n",
    "from evidently.metric_preset import TargetDriftPreset\n",
    "from evidently.metric_preset import TextOverviewPreset\n",
    "\n",
    "from evidently.metrics import *\n",
    "\n",
    "from evidently.test_preset import NoTargetPerformanceTestPreset\n",
    "from evidently.test_preset import DataStabilityTestPreset\n",
    "from evidently.test_preset import DataQualityTestPreset\n",
    "from evidently.test_preset import DataDriftTestPreset\n",
    "\n",
    "from evidently.tests import *\n",
    "\n",
    "from evidently.tests.base_test import generate_column_tests\n",
    "from evidently.metrics.base_metric import generate_column_metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "HmpgBJFqH7y-",
    "outputId": "58c188d4-d810-4ce9-a806-9d3a033309c8"
   },
   "outputs": [],
   "source": [
    "import nltk\n",
    "nltk.download('words')\n",
    "nltk.download('wordnet')\n",
    "nltk.download('omw-1.4')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Y_EV_iCeCiz1"
   },
   "outputs": [],
   "source": [
    "#Dataset for Data Quality and Integrity\n",
    "reviews_data = datasets.fetch_openml(name='Womens-E-Commerce-Clothing-Reviews', version=2, as_frame='auto')\n",
    "reviews = reviews_data.frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "YWyVOGsM2gV7"
   },
   "outputs": [],
   "source": [
    "reviews['prediction'] = reviews['Rating']\n",
    "reviews_ref = reviews[reviews.Rating > 3].sample(n=5000, replace=True, ignore_index=True, random_state=42) #.dropna()\n",
    "reviews_cur = reviews[reviews.Rating < 3].sample(n=5000, replace=True, ignore_index=True, random_state=42) #.dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 206
    },
    "id": "1lUgz2JlAheU",
    "outputId": "4202acf4-3c91-4809-81a6-fd5fc72430a2"
   },
   "outputs": [],
   "source": [
    "reviews.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "60Wxndq7FzGT"
   },
   "outputs": [],
   "source": [
    "column_mapping = ColumnMapping(\n",
    "    target='Rating',\n",
    "    numerical_features=['Age', 'Positive_Feedback_Count'],\n",
    "    categorical_features=['Division_Name', 'Department_Name', 'Class_Name'],\n",
    "    text_features=['Review_Text', 'Title']\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "DGMq9vbvy15y"
   },
   "source": [
    "# Metric Presest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "oWWVjRO4Cesf",
    "outputId": "513c4d3d-1497-487b-8b46-e2c89e2f6b2e"
   },
   "outputs": [],
   "source": [
    "data_drift_report = Report(metrics=[\n",
    "    DataDriftPreset(num_stattest='ks', cat_stattest='psi', num_stattest_threshold=0.2, cat_stattest_threshold=0.2),\n",
    "])\n",
    "\n",
    "data_drift_report.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
    "data_drift_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 104
    },
    "id": "oeDoTQ_rW7K9",
    "outputId": "0ab1875c-73e6-4d9c-cf3a-ad2cd4b930a3"
   },
   "outputs": [],
   "source": [
    "data_drift_report.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "VEe4LnGxtKsp",
    "outputId": "68a68c47-8011-4c61-cbf7-48641925c861"
   },
   "outputs": [],
   "source": [
    "data_quality_report = Report(metrics=[\n",
    "    DataQualityPreset()\n",
    "])\n",
    "\n",
    "data_quality_report.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
    "data_quality_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "Mm1le3C_Xat4",
    "outputId": "b8fab152-a224-4030-c131-3457644a53d9"
   },
   "outputs": [],
   "source": [
    "data_quality_report.as_dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 104
    },
    "id": "AqEb7BFPY4nu",
    "outputId": "6a5b5fad-c413-48cf-b874-2fb8ad6aa526"
   },
   "outputs": [],
   "source": [
    "data_quality_report.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "4eDJzmKq020I",
    "outputId": "98d80c9c-19b5-4037-8a8f-47bd1db0c71c"
   },
   "outputs": [],
   "source": [
    "target_drift_report = Report(metrics=[\n",
    "    TargetDriftPreset()\n",
    "])\n",
    "\n",
    "target_drift_report.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
    "target_drift_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 104
    },
    "id": "arXlC4vwb1dW",
    "outputId": "7d69bf82-4dce-4fb6-b6aa-b2c6838d0f04"
   },
   "outputs": [],
   "source": [
    "target_drift_report.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "Ar-1WPAt1pSm",
    "outputId": "91ed2aff-3821-4660-826f-ae6bcbc40f3f"
   },
   "outputs": [],
   "source": [
    "regression_report = Report(metrics=[\n",
    "    RegressionPreset()\n",
    "])\n",
    "\n",
    "regression_report.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=ColumnMapping(\n",
    "    target='Rating',\n",
    "    prediction='prediction',\n",
    "    numerical_features=['Age', 'Positive_Feedback_Count'],\n",
    "    categorical_features=['Division_Name', 'Department_Name', 'Class_Name'],\n",
    "    text_features=['Review_Text', 'Title'],\n",
    "    task='regression'\n",
    "  )\n",
    ")\n",
    "regression_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 104
    },
    "id": "3bucSBrnaquV",
    "outputId": "6bd8b91b-1422-4efa-dbda-b6f309fc9b98"
   },
   "outputs": [],
   "source": [
    "regression_report.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "YViVD_VA20OC",
    "outputId": "16dd9375-737a-4aab-80b7-b5d77224724c"
   },
   "outputs": [],
   "source": [
    "classification_report = Report(metrics=[\n",
    "    ClassificationPreset()\n",
    "])\n",
    "classification_report.run(reference_data=reviews.sample(n=5000, replace=False), current_data=reviews.sample(n=5000, replace=False), column_mapping=ColumnMapping(\n",
    "    target='Rating',\n",
    "    prediction='prediction',\n",
    "    numerical_features=['Age', 'Positive_Feedback_Count'],\n",
    "    categorical_features=['Division_Name', 'Department_Name', 'Class_Name'],\n",
    "    text_features=['Review_Text', 'Title'],\n",
    "    task='classification'\n",
    "  )\n",
    ")\n",
    "\n",
    "classification_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 104
    },
    "id": "PmFW0tMlBdrl",
    "outputId": "d26a46e1-b7f4-4e79-dec6-ae635873e5df"
   },
   "outputs": [],
   "source": [
    "classification_report.json()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "IAe0g1SWlV4L"
   },
   "source": [
    "# Text Overview Preset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "9H8xHPN-tYY8",
    "outputId": "644abbf8-d717-484f-8125-902b99288c59"
   },
   "outputs": [],
   "source": [
    "text_overview_report = Report(metrics=[\n",
    "    TextOverviewPreset(column_name=\"Review_Text\")\n",
    "])\n",
    "\n",
    "text_overview_report.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
    "text_overview_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 104
    },
    "id": "msjgy3j-f-5i",
    "outputId": "0e86becd-b75d-42f9-d115-72de002f8786"
   },
   "outputs": [],
   "source": [
    "text_overview_report.json()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "eSMsBr3n3Qlb"
   },
   "source": [
    "# General Metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "4Rc1Wbo9tOmX",
    "outputId": "06c2a357-4690-4b8b-bdf6-a294bc91e5e0"
   },
   "outputs": [],
   "source": [
    "dataset_metrics_with_text_report = Report(metrics=[\n",
    "    DatasetSummaryMetric(), \n",
    "    DatasetMissingValuesMetric(),\n",
    "    DatasetCorrelationsMetric(), \n",
    "    ConflictTargetMetric(),\n",
    "    ConflictPredictionMetric(),\n",
    "    DatasetDriftMetric(),\n",
    "    DataDriftTable(),\n",
    "    TargetByFeaturesTable(columns=['Review_Text', 'Title']),\n",
    "    ClassificationQualityByFeatureTable(), \n",
    "])\n",
    "\n",
    "dataset_metrics_with_text_report.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
    "dataset_metrics_with_text_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "hobJomPJk61L"
   },
   "outputs": [],
   "source": [
    "dataset_metrics_with_text_report.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "HQ5gagdH396s",
    "outputId": "fac316f5-91d8-4d96-9d10-fd0399e26b4b"
   },
   "outputs": [],
   "source": [
    "column_metrics_with_text_report = Report(metrics=[\n",
    "    ColumnSummaryMetric(column_name=\"Review_Text\"),\n",
    "    ColumnMissingValuesMetric(column_name=\"Review_Text\"), \n",
    "    ColumnRegExpMetric(column_name=\"Review_Text\", reg_exp=r'.*love*.'),\n",
    "    ColumnDriftMetric(column_name=\"Review_Text\"),\n",
    "    ColumnSummaryMetric(column_name=\"Title\"),\n",
    "    ColumnMissingValuesMetric(column_name=\"Title\"), \n",
    "    ColumnRegExpMetric(column_name=\"Title\", reg_exp=r\".*love*.\"),\n",
    "    ColumnDriftMetric(column_name=\"Title\"),\n",
    "])\n",
    "\n",
    "column_metrics_with_text_report.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
    "column_metrics_with_text_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 104
    },
    "id": "AbvTrbAbf1Uh",
    "outputId": "7e887d23-cf17-4991-85e7-a3708117d605"
   },
   "outputs": [],
   "source": [
    "column_metrics_with_text_report.json()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "LsQ6xE_UGM_e"
   },
   "source": [
    "# Text Metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "BBJAhFjJtSKh",
    "outputId": "2560a352-3266-4f39-cf36-5c6aa582fe94"
   },
   "outputs": [],
   "source": [
    "text_specific_metrics_report = Report(metrics=[\n",
    "    TextDescriptorsDriftMetric(column_name=\"Review_Text\"),\n",
    "    TextDescriptorsDistribution(column_name=\"Review_Text\"),\n",
    "    TextDescriptorsCorrelationMetric(column_name=\"Review_Text\"),\n",
    "])\n",
    "\n",
    "text_specific_metrics_report.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
    "text_specific_metrics_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 104
    },
    "id": "kuQPC867gNKo",
    "outputId": "d7af5617-6cf5-4915-8d92-b67e7db8abb8"
   },
   "outputs": [],
   "source": [
    "text_specific_metrics_report.json()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "0nROQDZfG93t"
   },
   "source": [
    "# Test Suites"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "E75w7C6_tUck",
    "outputId": "93909a9d-f22d-4e70-d6a9-ab56412cbc1d"
   },
   "outputs": [],
   "source": [
    "no_target_performance_suite = TestSuite(tests=[\n",
    "    NoTargetPerformanceTestPreset()\n",
    "])\n",
    "\n",
    "no_target_performance_suite.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
    "no_target_performance_suite\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 104
    },
    "id": "DLAaYYuIg_57",
    "outputId": "02e69750-fce2-42a6-81a6-31663fc04678"
   },
   "outputs": [],
   "source": [
    "no_target_performance_suite.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "1EHX7yVXHeB1",
    "outputId": "c82e3239-5308-47c8-d332-a74122553a4d"
   },
   "outputs": [],
   "source": [
    "data_stability_suite = TestSuite(tests=[\n",
    "    DataStabilityTestPreset()\n",
    "])\n",
    "\n",
    "data_stability_suite.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
    "data_stability_suite"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 104
    },
    "id": "poFqLddyhRv3",
    "outputId": "4bf7dcf9-bdad-4e64-e1fc-e2a91f04887d"
   },
   "outputs": [],
   "source": [
    "data_stability_suite.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "KjFQSJOPHeLs",
    "outputId": "da417338-8034-4564-f5aa-c73200a9fc7e"
   },
   "outputs": [],
   "source": [
    "data_quality_suite = TestSuite(tests=[\n",
    "    DataQualityTestPreset()\n",
    "])\n",
    "\n",
    "data_quality_suite.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
    "data_quality_suite"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 104
    },
    "id": "ATyuOuZ-hiUl",
    "outputId": "d4e9b7fc-f88c-41c1-93b8-402becc52405"
   },
   "outputs": [],
   "source": [
    "data_quality_suite.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "U8Bb8Y5-tTo1",
    "outputId": "a6d156bb-1d6d-477f-f180-a9e8f3bc6fdb"
   },
   "outputs": [],
   "source": [
    "data_drift_suite = TestSuite(tests=[\n",
    "    DataDriftTestPreset()\n",
    "])\n",
    "\n",
    "data_drift_suite.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
    "data_drift_suite"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 104
    },
    "id": "ik_Y2VjShsD5",
    "outputId": "abd3e784-6126-48c3-ea6a-90405af62fb2"
   },
   "outputs": [],
   "source": [
    "data_drift_suite.json()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "zzrAfiygOoCd"
   },
   "source": [
    "# Tests with Text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "gtA-nnorOstX",
    "outputId": "7abd17b9-1a6f-49e4-afd0-20ed6124ee96"
   },
   "outputs": [],
   "source": [
    "dataset_tests_with_text_columns_suite = TestSuite(tests=[\n",
    "    TestNumberOfRows(),\n",
    "    TestNumberOfColumns(),\n",
    "    TestNumberOfMissingValues(),\n",
    "    TestShareOfMissingValues(),\n",
    "    TestNumberOfColumnsWithMissingValues(),\n",
    "    TestShareOfColumnsWithMissingValues(),\n",
    "    TestNumberOfRowsWithMissingValues(),\n",
    "    TestShareOfRowsWithMissingValues(),\n",
    "    TestNumberOfDifferentMissingValues(),\n",
    "    TestNumberOfConstantColumns(),\n",
    "    TestNumberOfEmptyRows(),\n",
    "    TestNumberOfEmptyColumns(),\n",
    "    TestNumberOfDuplicatedRows(),\n",
    "    TestNumberOfDuplicatedColumns(),\n",
    "    TestColumnsType(),\n",
    "    TestConflictTarget(),\n",
    "    TestConflictPrediction(),\n",
    "    TestHighlyCorrelatedColumns(),\n",
    "    TestTargetFeaturesCorrelations(),\n",
    "    TestPredictionFeaturesCorrelations(),\n",
    "    TestCorrelationChanges(),\n",
    "    TestNumberOfDriftedColumns(),\n",
    "    TestShareOfDriftedColumns(),\n",
    "])\n",
    "\n",
    "dataset_tests_with_text_columns_suite.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
    "dataset_tests_with_text_columns_suite"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 104
    },
    "id": "gHJzWiBTh0_A",
    "outputId": "0fb11ac5-af1d-49ac-c360-a93ec11fbf33"
   },
   "outputs": [],
   "source": [
    "dataset_tests_with_text_columns_suite.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "h-MtNvuY9UFV",
    "outputId": "de641580-01df-4fee-b9e4-27e299d7db3f"
   },
   "outputs": [],
   "source": [
    "text_column_test_suite = TestSuite(tests=[\n",
    "    TestColumnNumberOfMissingValues(column_name='Review_Text'),\n",
    "    TestColumnShareOfMissingValues(column_name='Review_Text'),\n",
    "    TestColumnNumberOfDifferentMissingValues(column_name='Review_Text'),\n",
    "    TestColumnRegExp(column_name='Review_Text', reg_exp='.*love*.'),\n",
    "    TestColumnDrift(column_name='Review_Text'),\n",
    "    # the following tests will be adopted to text data later:\n",
    "    TestColumnAllConstantValues(column_name='Review_Text'),\n",
    "    TestColumnAllUniqueValues(column_name='Review_Text'),\n",
    "    TestNumberOfUniqueValues(column_name='Review_Text'),\n",
    "    TestUniqueValuesShare(column_name='Review_Text'),\n",
    "    TestMostCommonValueShare(column_name='Review_Text'),\n",
    "    \n",
    "])\n",
    "\n",
    "text_column_test_suite.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
    "text_column_test_suite"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "_QFxsqpslM03"
   },
   "source": [
    "# Generators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "Ll9D9Q5hljUy",
    "outputId": "cdf80156-e9a3-4631-88b3-c884cbe9f6ea"
   },
   "outputs": [],
   "source": [
    "suite = TestSuite(tests=[generate_column_tests(TestColumnShareOfMissingValues)]) \n",
    "suite.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping) \n",
    "suite.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 447
    },
    "id": "vJK6xO6qlsqN",
    "outputId": "e34efdb8-e43c-457a-a95a-7a95e85d4d42"
   },
   "outputs": [],
   "source": [
    "suite = TestSuite(tests=[generate_column_tests(TestColumnShareOfMissingValues, columns=\"text\")]) \n",
    "suite.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping) \n",
    "suite.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "uW0CDNQAlyGk",
    "outputId": "372c494d-90e9-4ab5-e792-7e09a136d25c"
   },
   "outputs": [],
   "source": [
    "report = Report(\n",
    "    metrics=[generate_column_metrics(ColumnDriftMetric, columns=\"text\")]\n",
    ")\n",
    "report.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
    "report"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
